{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.chdir('../')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import DeepPurpose.models as models\n",
    "from DeepPurpose.utils import *\n",
    "from DeepPurpose.dataset import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Beginning Processing...\n",
      "Beginning to extract zip file...\n",
      "Done!\n",
      "in total: 118254 drug-target pairs\n",
      "encoding drug...\n",
      "unique drugs: 2068\n",
      "drug encoding finished...\n",
      "encoding protein...\n",
      "unique target sequence: 229\n",
      "-- Encoding AAC takes time. Time Reference: 24s for ~100 sequences in a CPU. Calculate your time by the unique target sequence #, instead of the entire dataset.\n",
      "protein encoding finished...\n",
      "splitting dataset...\n",
      "Done.\n",
      "cost about 219 seconds\n"
     ]
    }
   ],
   "source": [
    "from time import time\n",
    "\n",
    "t1 = time()\n",
    "X_drug, X_target, y = load_process_KIBA('./data/', binary=False)\n",
    "\n",
    "drug_encoding = 'MPNN'\n",
    "target_encoding = 'AAC'\n",
    "train, val, test = data_process(X_drug, X_target, y, \n",
    "                                drug_encoding, target_encoding, \n",
    "                                split_method='random',frac=[0.7,0.1,0.2])\n",
    "\n",
    "# use the parameters setting provided in the paper: https://arxiv.org/abs/1801.10193\n",
    "config = generate_config(drug_encoding = drug_encoding, \n",
    "                         target_encoding = target_encoding, \n",
    "                         cls_hidden_dims = [1024,1024,512], \n",
    "                         train_epoch = 100, \n",
    "                         test_every_X_epoch = 10, \n",
    "                         LR = 0.001, \n",
    "                         batch_size = 128,\n",
    "                         hidden_dim_drug = 128,\n",
    "                         mpnn_hidden_size = 128,\n",
    "                         mpnn_depth = 3, \n",
    "                         cnn_target_filters = [32,64,96],\n",
    "                         cnn_target_kernels = [4,8,12]\n",
    "                        )\n",
    "model = models.model_initialize(**config)\n",
    "t2 = time()\n",
    "print(\"cost about \" + str(int(t2-t1)) + \" seconds\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Let's use CPU/s!\n",
      "--- Data Preparation ---\n",
      "--- Go for Training ---\n",
      "Training at Epoch 1 iteration 0 with loss 139.375. Total time 0.00083 hours\n",
      "Training at Epoch 1 iteration 100 with loss 0.99343. Total time 0.02472 hours\n",
      "Training at Epoch 1 iteration 200 with loss 0.83609. Total time 0.04777 hours\n",
      "Training at Epoch 1 iteration 300 with loss 0.96589. Total time 0.07444 hours\n",
      "Training at Epoch 1 iteration 400 with loss 0.85949. Total time 0.1 hours\n",
      "Training at Epoch 1 iteration 500 with loss 0.54122. Total time 0.13111 hours\n",
      "Training at Epoch 1 iteration 600 with loss 0.58168. Total time 0.16777 hours\n",
      "Validation at Epoch 1 , MSE: 0.57232 , Pearson Correlation: 0.43984 with p-value: 0.0 , Concordance Index: 0.68495\n",
      "Training at Epoch 2 iteration 0 with loss 0.42175. Total time 0.18944 hours\n",
      "Training at Epoch 2 iteration 100 with loss 0.59724. Total time 0.22166 hours\n",
      "Training at Epoch 2 iteration 200 with loss 0.67120. Total time 0.25972 hours\n",
      "Training at Epoch 2 iteration 300 with loss 0.66331. Total time 0.285 hours\n",
      "Training at Epoch 2 iteration 400 with loss 0.86964. Total time 0.31 hours\n",
      "Training at Epoch 2 iteration 500 with loss 0.63331. Total time 0.33472 hours\n",
      "Training at Epoch 2 iteration 600 with loss 0.72784. Total time 0.36166 hours\n",
      "Validation at Epoch 2 , MSE: 0.53912 , Pearson Correlation: 0.48472 with p-value: 0.0 , Concordance Index: 0.69957\n",
      "Training at Epoch 3 iteration 0 with loss 0.53930. Total time 0.38305 hours\n",
      "Training at Epoch 3 iteration 100 with loss 0.52499. Total time 0.40972 hours\n",
      "Training at Epoch 3 iteration 200 with loss 1.20812. Total time 0.43444 hours\n",
      "Training at Epoch 3 iteration 300 with loss 1.05639. Total time 0.45888 hours\n",
      "Training at Epoch 3 iteration 400 with loss 0.94682. Total time 0.48416 hours\n",
      "Training at Epoch 3 iteration 500 with loss 0.61475. Total time 0.50861 hours\n",
      "Training at Epoch 3 iteration 600 with loss 0.68008. Total time 0.53305 hours\n",
      "Validation at Epoch 3 , MSE: 0.53130 , Pearson Correlation: 0.51384 with p-value: 0.0 , Concordance Index: 0.70860\n",
      "Training at Epoch 4 iteration 0 with loss 0.50197. Total time 0.55583 hours\n",
      "Training at Epoch 4 iteration 100 with loss 0.97383. Total time 0.58055 hours\n",
      "Training at Epoch 4 iteration 200 with loss 0.74706. Total time 0.60527 hours\n",
      "Training at Epoch 4 iteration 300 with loss 0.61534. Total time 0.63027 hours\n",
      "Training at Epoch 4 iteration 400 with loss 0.63400. Total time 0.65527 hours\n",
      "Training at Epoch 4 iteration 500 with loss 0.72008. Total time 0.68 hours\n",
      "Training at Epoch 4 iteration 600 with loss 0.65495. Total time 0.70472 hours\n",
      "Validation at Epoch 4 , MSE: 0.50166 , Pearson Correlation: 0.53950 with p-value: 0.0 , Concordance Index: 0.71986\n",
      "Training at Epoch 5 iteration 0 with loss 0.41625. Total time 0.72333 hours\n",
      "Training at Epoch 5 iteration 100 with loss 0.49403. Total time 0.74805 hours\n",
      "Training at Epoch 5 iteration 200 with loss 0.68507. Total time 0.77277 hours\n",
      "Training at Epoch 5 iteration 300 with loss 0.48458. Total time 0.7975 hours\n",
      "Training at Epoch 5 iteration 400 with loss 0.96469. Total time 0.82583 hours\n",
      "Training at Epoch 5 iteration 500 with loss 0.55342. Total time 0.85305 hours\n",
      "Training at Epoch 5 iteration 600 with loss 0.80672. Total time 0.87944 hours\n",
      "Validation at Epoch 5 , MSE: 0.48277 , Pearson Correlation: 0.56151 with p-value: 0.0 , Concordance Index: 0.72517\n",
      "Training at Epoch 6 iteration 0 with loss 0.65440. Total time 0.89972 hours\n",
      "Training at Epoch 6 iteration 100 with loss 0.49574. Total time 0.92666 hours\n",
      "Training at Epoch 6 iteration 200 with loss 0.65854. Total time 0.95333 hours\n",
      "Training at Epoch 6 iteration 300 with loss 0.61574. Total time 0.98194 hours\n",
      "Training at Epoch 6 iteration 400 with loss 0.57750. Total time 1.00944 hours\n",
      "Training at Epoch 6 iteration 500 with loss 0.56961. Total time 1.03805 hours\n",
      "Training at Epoch 6 iteration 600 with loss 0.54698. Total time 1.06611 hours\n",
      "Validation at Epoch 6 , MSE: 0.48746 , Pearson Correlation: 0.58950 with p-value: 0.0 , Concordance Index: 0.73456\n",
      "Training at Epoch 7 iteration 0 with loss 0.53380. Total time 1.08833 hours\n",
      "Training at Epoch 7 iteration 100 with loss 0.55768. Total time 1.11444 hours\n",
      "Training at Epoch 7 iteration 200 with loss 0.57367. Total time 1.14277 hours\n",
      "Training at Epoch 7 iteration 300 with loss 0.60914. Total time 1.17055 hours\n",
      "Training at Epoch 7 iteration 400 with loss 0.58427. Total time 1.19666 hours\n",
      "Training at Epoch 7 iteration 500 with loss 0.61994. Total time 1.22305 hours\n",
      "Training at Epoch 7 iteration 600 with loss 0.70587. Total time 1.25194 hours\n",
      "Validation at Epoch 7 , MSE: 0.46277 , Pearson Correlation: 0.60217 with p-value: 0.0 , Concordance Index: 0.73545\n",
      "Training at Epoch 8 iteration 0 with loss 0.62481. Total time 1.27194 hours\n",
      "Training at Epoch 8 iteration 100 with loss 0.60135. Total time 1.29916 hours\n",
      "Training at Epoch 8 iteration 200 with loss 0.61045. Total time 1.32611 hours\n",
      "Training at Epoch 8 iteration 300 with loss 0.46631. Total time 1.35305 hours\n",
      "Training at Epoch 8 iteration 400 with loss 0.57605. Total time 1.38083 hours\n",
      "Training at Epoch 8 iteration 500 with loss 0.55145. Total time 1.40666 hours\n",
      "Training at Epoch 8 iteration 600 with loss 0.88128. Total time 1.4325 hours\n",
      "Validation at Epoch 8 , MSE: 0.54907 , Pearson Correlation: 0.61538 with p-value: 0.0 , Concordance Index: 0.74227\n",
      "Training at Epoch 9 iteration 0 with loss 0.66877. Total time 1.45305 hours\n",
      "Training at Epoch 9 iteration 100 with loss 0.55947. Total time 1.47972 hours\n",
      "Training at Epoch 9 iteration 200 with loss 0.80887. Total time 1.50527 hours\n",
      "Training at Epoch 9 iteration 300 with loss 0.51789. Total time 1.53638 hours\n",
      "Training at Epoch 9 iteration 400 with loss 0.45560. Total time 1.56305 hours\n",
      "Training at Epoch 9 iteration 500 with loss 0.54652. Total time 1.58916 hours\n",
      "Training at Epoch 9 iteration 600 with loss 0.51819. Total time 1.61583 hours\n",
      "Validation at Epoch 9 , MSE: 0.41752 , Pearson Correlation: 0.63835 with p-value: 0.0 , Concordance Index: 0.74886\n",
      "Training at Epoch 10 iteration 0 with loss 0.53559. Total time 1.63583 hours\n",
      "Training at Epoch 10 iteration 100 with loss 0.42198. Total time 1.66277 hours\n",
      "Training at Epoch 10 iteration 200 with loss 0.74855. Total time 1.69055 hours\n",
      "Training at Epoch 10 iteration 300 with loss 0.56010. Total time 1.72138 hours\n",
      "Training at Epoch 10 iteration 400 with loss 0.64494. Total time 1.74861 hours\n",
      "Training at Epoch 10 iteration 500 with loss 0.40499. Total time 1.77416 hours\n",
      "Training at Epoch 10 iteration 600 with loss 0.38197. Total time 1.80527 hours\n",
      "Validation at Epoch 10 , MSE: 0.39831 , Pearson Correlation: 0.66272 with p-value: 0.0 , Concordance Index: 0.75689\n",
      "Training at Epoch 11 iteration 0 with loss 0.44420. Total time 1.83083 hours\n",
      "Training at Epoch 11 iteration 100 with loss 0.61767. Total time 1.86916 hours\n",
      "Training at Epoch 11 iteration 200 with loss 0.59886. Total time 1.90138 hours\n",
      "Training at Epoch 11 iteration 300 with loss 0.44023. Total time 1.93888 hours\n",
      "Training at Epoch 11 iteration 400 with loss 0.49944. Total time 1.98166 hours\n",
      "Training at Epoch 11 iteration 500 with loss 0.80822. Total time 2.02527 hours\n",
      "Training at Epoch 11 iteration 600 with loss 0.60348. Total time 2.06 hours\n",
      "Validation at Epoch 11 , MSE: 0.50192 , Pearson Correlation: 0.67912 with p-value: 0.0 , Concordance Index: 0.76373\n",
      "--- Go for Testing ---\n",
      "Up to Epoch 10 Testing MSE: 0.6241257333934345 , Pearson Correlation: 0.5794750762149183 with p-value: 0.0 , Concordance Index: 0.7073133872497231\n",
      "Training at Epoch 12 iteration 0 with loss 0.47469. Total time 2.095 hours\n",
      "Training at Epoch 12 iteration 100 with loss 0.53183. Total time 2.12861 hours\n",
      "Training at Epoch 12 iteration 200 with loss 0.45367. Total time 2.16027 hours\n",
      "Training at Epoch 12 iteration 300 with loss 0.39957. Total time 2.18583 hours\n",
      "Training at Epoch 12 iteration 400 with loss 0.55815. Total time 2.21361 hours\n",
      "Training at Epoch 12 iteration 500 with loss 0.45266. Total time 2.23916 hours\n",
      "Training at Epoch 12 iteration 600 with loss 0.44607. Total time 2.27666 hours\n",
      "Validation at Epoch 12 , MSE: 0.39189 , Pearson Correlation: 0.68264 with p-value: 0.0 , Concordance Index: 0.75734\n",
      "Training at Epoch 13 iteration 0 with loss 0.46262. Total time 2.30166 hours\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training at Epoch 13 iteration 100 with loss 0.40722. Total time 2.33388 hours\n",
      "Training at Epoch 13 iteration 200 with loss 0.44606. Total time 2.36583 hours\n",
      "Training at Epoch 13 iteration 300 with loss 0.34755. Total time 2.39638 hours\n",
      "Training at Epoch 13 iteration 400 with loss 0.36445. Total time 2.41861 hours\n",
      "Training at Epoch 13 iteration 500 with loss 0.39682. Total time 2.43777 hours\n",
      "Training at Epoch 13 iteration 600 with loss 0.41257. Total time 2.45666 hours\n",
      "Validation at Epoch 13 , MSE: 0.36162 , Pearson Correlation: 0.69767 with p-value: 0.0 , Concordance Index: 0.76840\n",
      "Training at Epoch 14 iteration 0 with loss 0.39781. Total time 2.47 hours\n",
      "Training at Epoch 14 iteration 100 with loss 0.39343. Total time 2.49111 hours\n",
      "Training at Epoch 14 iteration 200 with loss 0.40009. Total time 2.51388 hours\n",
      "Training at Epoch 14 iteration 300 with loss 0.66970. Total time 2.54305 hours\n",
      "Training at Epoch 14 iteration 400 with loss 0.37376. Total time 2.57083 hours\n",
      "Training at Epoch 14 iteration 500 with loss 0.42178. Total time 2.59333 hours\n",
      "Training at Epoch 14 iteration 600 with loss 0.68821. Total time 2.61416 hours\n",
      "Validation at Epoch 14 , MSE: 0.35647 , Pearson Correlation: 0.71057 with p-value: 0.0 , Concordance Index: 0.77446\n",
      "Training at Epoch 15 iteration 0 with loss 0.34164. Total time 2.63277 hours\n",
      "Training at Epoch 15 iteration 100 with loss 0.40583. Total time 2.66138 hours\n",
      "Training at Epoch 15 iteration 200 with loss 0.38328. Total time 2.69027 hours\n",
      "Training at Epoch 15 iteration 300 with loss 0.52372. Total time 2.71694 hours\n",
      "Training at Epoch 15 iteration 400 with loss 0.53249. Total time 2.74138 hours\n",
      "Training at Epoch 15 iteration 500 with loss 0.38095. Total time 2.76111 hours\n",
      "Training at Epoch 15 iteration 600 with loss 0.45153. Total time 2.78555 hours\n",
      "Validation at Epoch 15 , MSE: 0.38514 , Pearson Correlation: 0.71744 with p-value: 0.0 , Concordance Index: 0.77747\n",
      "Training at Epoch 16 iteration 0 with loss 0.44061. Total time 2.8025 hours\n",
      "Training at Epoch 16 iteration 100 with loss 0.42707. Total time 2.82611 hours\n",
      "Training at Epoch 16 iteration 200 with loss 0.35150. Total time 2.84833 hours\n",
      "Training at Epoch 16 iteration 300 with loss 0.58418. Total time 2.87194 hours\n",
      "Training at Epoch 16 iteration 400 with loss 0.47213. Total time 2.9 hours\n",
      "Training at Epoch 16 iteration 500 with loss 0.43227. Total time 2.92611 hours\n",
      "Training at Epoch 16 iteration 600 with loss 0.34077. Total time 2.94444 hours\n",
      "Validation at Epoch 16 , MSE: 0.34447 , Pearson Correlation: 0.71855 with p-value: 0.0 , Concordance Index: 0.78137\n",
      "Training at Epoch 17 iteration 0 with loss 0.41209. Total time 2.96111 hours\n",
      "Training at Epoch 17 iteration 100 with loss 0.50063. Total time 2.98611 hours\n",
      "Training at Epoch 17 iteration 200 with loss 0.41551. Total time 3.00888 hours\n"
     ]
    }
   ],
   "source": [
    "model.train(train, val, test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_model('./model_MPNN_AAC_Kiba')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
